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APPENDIX A: Document Type Definition of Multimedia Integration Description 
Scheme 

MMintegrationds.dtd 
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<!— Multimedia Integration Description Scheme — > 

<!ELEMENT MM_Stream ( MM_Object_Set, Object_Hierarchy*, 
Entity_Relation_Graph* )> 
<!ATTLIST MM_Stream 
id ID #IMPLIED> 

<!— This is how external DTDs are included in the current DTD ~> 

<!-- External Video DS DTD -> 

<!ENTITY % Video_DS SYSTEM "video_ds.dtd"> 

%Video_DS; 

<!— External Audio DS DTD --> 

<!ENTITY % Audio_DS SYSTEM "audio_ds.dtd"> 

%Audio_DS; 

<!— External Text DS DTD -> 

<!ENTITY % Text_DS SYSTEM "text_ds.dtd"> 

%Text_DS; 

<!— External Synthetic DS DTD -- > 

<!ENTITY % Synthetic_DS SYSTEM "synthetic_ds.dtd"> 

%Synthetic_DS; 

<!— External Image DS DTD -> 

<!ENTITY % Image_DS SYSTEM "image_ds.dtd"> 

%Image_DS; 

<!ELEMENT MM_Object_Set ( MM_Object+ )> 

<!ELEMENT MM Object ( Media Object Set, Object Hierarchy*, 
Entity_Relation_Graph*, 

MM Obj Media Features?, MM Obj Semantic Features?, 
MM_Obj_Temporal_Features? )> 



<!ATTLIST MM_Object 

Object_Type (LOCAL|GLOBAL) 



#REQUIRED 
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id 

ObjectRef 

ObjectNodeRef 

EntityNodeRef 



ID 

IDREF 

IDREFS 

IDREFS 



#IMPLIED 
#IMPLIED 
#IMPLIED 
#IMPLIED> 



<!ELEMENT Media_Object_Set ( Audio_Object | Image_Object | Video_Object | 
Text_Object 

Synthetic_Object )+ > 

<!-- The object hierarchy and the entity relation graph are defined in the Image DS 

(Proposal #480). We include them in this DTD for convenience — > 

<!— Object hierarchy element ~> 

<!-- The attribute type is the hierarchy binding type ~> 

<!ELEMENT Object_Hierarchy ( Object_Node )> 

<!ATTLIST Object_Hierarchy 

id ID IMPLIED 

type CDATA #IMPLIED> 

<!ELEMENT Object Node ( Object_Node* )> 
<!ATTLIST objectnode 

id ID #IMPLIED 

Object_Ref IDREF #REQIJIRED> 

<!-- Entity relation graph element~> 

<!-- Possible types of entity relations and entity relation graphs: 

- Spatial: topological, directional 

- Temporal: topological, directional 

- Semantic ~> 

<!ELEMENT Entity_Relation_Graph ( Entity_Relation+ )> 
<!ATTLIST Entity_Relation_Graph 

id ID #IMPLIED 

type CDATA #IMPLIED> 

<!ELEMENT Entity_Relation ( Relation, (Entity_Node | Entity_Node_Set | 

Entity_Relation)* )> 

<! ATTLIST Entity_Relation 

type CDATA #IMPLIED> 

<!ELEMENT Entity_Node (#PCDATA)> 
<!ATTLIST Entity_Node 

id ID #IMPLIED 

Object_Ref IDREF #REQUIRED> 

<! ELEMENT Entity Node Set ( Entity_Node+ )> 
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<! ELEMENT Relation Temporal Parallelj Temporal Sequential | 

Spatial_Alignment | Spatial Arrangement | 
Semantic_Relation | code)*> 

<!ATTLIST Relation 

With_Respect_To IDREF #IMPLIED > 

<!ELEMENT Temporal_Parallel EMPTY > 
<! ELEMENT Temporal_Sequential EMPTY > 
<!ATTLIST Temporal_Sequential 

Pattern (EXACT|DELAY|PRIOR) "EXACT" > 

<!ELEMENT Spatial_Alignment EMPTY > 
<!ATTLIST Spatial_Alignment 

Pattern (Left_Align | Right_Align | 

Top_Align I Bottom_Align) "Left_Align" 

At_Time CDATA #IMPLIED > 

<!ELEMENT Spatial Arrangement ( Spatial_Relevance | Spatial Positioning )? > 
<!ATTLIST SpatialArrangement 

At Time CDATA #IMPLIED > 
<!ELEMENT Spatial Relevance EMPTY > 
<!ATTLIST SpatialRelevance 

Pattern (Top_Of | Bottom_Of | Left_Of | Right_Of | 

Upper_Left_Of | Upper_Right_Of |Lower_Left_Of | 
Lower_Right_Of | 

Adjacent To | Neighboring_To | Near_By | 

Within I Contained in ) "Top_Of ' > 

<!ELEMENT Spatial_Positioning EMPTY > 

<!ATTLIST Spatial_Positioning 

Horizontal_Shift CDATA #IMPLIED 
Vertical_Shift CDATA #IMPLIED > 

<!ELEMENT Semantic_Relation (Keywords* | code*)? > 



<! ELEMENT MM_Obj_Media_Features ( Data_Location?, Scalable_Representation?, 

ModalityTranscoding? )> 

<!ELEMENT MM_Obj_Semantic_Features ( Text Annotation?, Keywords? )> 

<! ELEMENT MM_Obj_Temporal_Features ( Duration? )> 



<!ELEMENT 


Keywords ( Word*, Code* ) > 




<!Ai ILlo 1 


Kejwords 






iNO-woras \^uj\iJ\ 


iff ivb u iKbU 




Language CDATA 


bnglisn 




bxtraction_Manner (AU i UMA 1 1C|JVLAJ\I UAL) 


"A A A XTT TAT >> ^ — 

MAJNUAL > 




uuraiion ^ 






Iniage_Duration?5 






Audio_Duration?, 






Video_Duration?, 






Text_Duration?, 






Synthetic_Duration? ) > 




A TTT TCT 

<!A1 iLlo i 


Duration 






Synchronized_Overall_Duration CDATA #IMPLIED > 


<!iiLbMxijN 1 


Image_Duration ( Time ) > 






Audio_Duration ( Time ) > 




<!ELEMENT 


Video_Duration ( Time ) > 




<!ELEMENT 


Synthetic_Duration ( Time ) > 




<!iiLxiJVLliJN 1 


Text_Duration ( Time? | Alignment? ) > 




< ! bLJbjVLJbJN 1 


Alighment EMPTY > 




<!A1 ILlbi 


Alignment 






With (IMAGE|AUDIO|VIDEO|SYNTHETIC) 


AUDIO > 


<rlTHT FA/TFMT 


Data_Location( 






imdge i^ocduon:, 






Audio_Location?, 






Video_Location?, 






Text_Location?, 






Synthetic_Location?) > 




<!iil^iiJVlilJN 1 


Image_Location (location) > 




<!ELEMENT 


Audio_Location (location) > 






Video_Location (location) > 






Text_Location (location) > 




<!ELEMENT 


Synthetic_Location (location) > 




<!iiLJbJVliijN 1 


Scalable_Representation ( 






Static_Sampled?, 






Dynamic_Condensed?) > 




<!ELEMENT 


Static_Sampled ( 






Image_Condensed?, 






Video_Static_Pictures?, 






Audio_Clips?, 
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Synthetic Pictures?) > 
<!ELEMENT Image Condensed ( Location*, Image Scl*, Image Subsampling* ) > 
<!ELEMENT Image Subsamplmg ( Image Subsampling Para, code)* > 
<!ELEMENT Image Subsampling Para EMPTY > 
<!ATTLIST Image_Subsampling_Para 

Scheme CDATA #REQUIRED 

Spatial_Rate CDATA #REQUIRED 

Frame_Size CDATA #IMPLIED > 
<! ELEMENT Video_Static_Pictures (Key_Frame* ) > 
<!ATTLIST No_KFs CDATA #REQUIRED > 

<!ELEMENT Audio_Clips (Audio_Object* | Audio_Hierarchy*) > 
<! ATTLIST No_Clips CDATA #REQUIRED > 

<!ELEMENT Synthetic_Pictures (Key_Frame)* > 
<!ATTLIST No-KFs CDATA #REQUIRED > 

<!ELEMENT Dynamic_Condensed ( 
VisualCondensed?, 
AudioCondensed?, 
TextCondensed?, 
Synthetic_Condensed?) > 

<!ELEMENT Visual Condensed ( Location*, Video Scl*, Video SubsampIing* ) > 
<!ELEMENT Video SubsampIing ( Video Subsampling Para, code)* > 
<!ELEMENT Video_Subsampling_Para EMPTY > 
<!ATTLIST VideoSubsamplingPara 

Scheme CDATA #REQUIRED 

Temporal_Rate CDATA #IMPLIED 

Spatial_Rate CDATA #IMPLIED 

Frame_Size CDATA #IMPLIED> 
<!ELEMENT Audio_Condensed (Location*, 

Audio_Compressed* , 
Audio_Subsampling* , 
Audio_Timescaled* ) > 
<! ELEMENT Audio_Compressed ( Audio_Compress_Para, code )* > 
<! ELEMENT Audio_Compress_Para EMPTY > 
<!ATTLIST Audio_Compress_Para 

Scheme CDATA #REQUIRED 

Bitrate CDATA #IMPLffiD > 
<!ELEMENT Audio Subsampling ( Audio Subsampling Para, code)* > 
<!ELEMENT Audio_Subsampling_Para EMPTY > 
<!ATTLIST Audio_Subsampling_Para 

Scheme CDATA #REQUIRED 

Temporal_Rate CDATA #IMPLIED > 
<!ELEMENT Audio Timescaled (Audio Timescale Para, code)* > 
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Audio_Timescale_Para EMPTY > 




A nm TOT 
<!A1 ILlb 1 


Audio_Tiniescale_Para 






Scale_Rate CDATA #REQUIRED > 




<^?ThT iHTVynHXTT 


Text_Condensed (Text_Abstract*) > 




! rl^i-f mVUZ/iN i 


Text_Abstract (Location*, (Text_Abstract_Para, code)*)? > 


A 'T'TT TOT 

<!A1 ILlo 1 


Text_Abstract 






Length_In_Words CDATA 


#IMPLIED 




Duration_In_Seconds CDATA 


#IMPLIED 




Language CDATA 


"English" 




Generation_Mode (AUTOMATIC|MANUAL) 


"MANUAL" > 




Text_Abstract_Para EMPTY > 






Text_Abstract_Para 






Length_In_Words CDATA 


#IMPLIED 




Language CDATA 


"English" > 




Synthetic_Condensed (Synthetic_Location*, (Synthetic_Condense_Para, 


code)*) > 






<!ELEMENT 


Synthetic_Condense_Para EMPTY > 




<!ATTLIST 


Synthetic_Condense_Para 






Spatial_Rate CDATA 


#REQUIRED 




Temporal_Rate CDATA 


#REQIJIRED 




Frame_Size CDATA 


#REQUIRED 




Bitrate CDATA 


#IMPLIED > 


<!-- Multimedia Integration DS End ~> 





